options(scipen = 999)


library(tidyverse)
library(ggthemes)
library(lubridate)
library(maps)
library(usmap)
library(ggridges)
library(viridis)
library(hrbrthemes)
library(zoo)
library(geofacet)
library(arm)
library(lfe)
library(ggalt)
library(fixest)

pacman::p_load_gh('bbc/bbplot')

city.data <- read.csv('Data/city_data_minimal.csv')
ois.min <- read.csv('Data/OIS_Dataset_Minimal.csv')
weapon.mapping <- read.csv('Data/other_data/weapon_mapping.csv')
load("Data/lemas/2020/38651-0001-Data.rda")
lemas.2020 <- da38651.0001
rm('da38651.0001')
load("Data/lemas/2016/37323-0001-Data.rda")
lemas.2016 <- da37323.0001
rm('da37323.0001')
ucr.offenses <- readRDS('Data/UCR/offenses_known_yearly_1960_2021.rds')
dem.voteshare <- read.csv('Data/other_data/demVoteShareAllYearsFIPS.csv')
police.employees <- read.csv('Data/other_data/ucrPoliceEmployeeData.csv')

city.data$first.year <- rep(NA,dim(city.data)[1])
city.data$last.year <- rep(NA,dim(city.data)[1])
for(i in 1:nrow(city.data)){
  if(grepl('-',city.data$Range.Provided.By.City[i])){
    city.data$first.year[i] <- unlist(strsplit(as.character(city.data$Range.Provided.By.City[i]),'-',fixed=TRUE))[1]
    city.data$last.year[i] <- unlist(strsplit(as.character(city.data$Range.Provided.By.City[i]),'-',fixed=TRUE))[2]
  }
}
city.data$totalYears <- ifelse(!is.na(city.data$first.year), as.numeric(city.data$last.year)-as.numeric(city.data$first.year)+1,0)

ois.min$Weapon.Category <- weapon.mapping$Weapon.Category[match(ois.min$Suspect.Weapon, weapon.mapping$Suspect.Weapon)]
ois.min$Detailed.Weapon.Category <- weapon.mapping$Detailed.Weapon.Category[match(ois.min$Suspect.Weapon, weapon.mapping$Suspect.Weapon)]


ois.min <- ois.min %>% filter(Hit != 'Miss')


ois.counts <- ois.min %>%
  group_by(LEAR_ID) %>%
  summarize(n.shootings=n())

ois.yearly.counts <- ois.min %>%
  group_by(LEAR_ID,Year) %>%
  summarize(n.shootings=n(),
            n.shootings.fatal=sum(Fatal=='Fatal'))




years <- seq(from=2000,to=2021)
lears <- unique(city.data$LEAR_ID)
department.yearly.data <- expand.grid(lear=lears, year=years, stringsAsFactors=FALSE) %>%
  left_join(ois.yearly.counts, by=c("lear"='LEAR_ID', "year"='Year')) %>% 
  mutate(n.shootings = ifelse(is.na(n.shootings), 0, n.shootings)) %>%
  left_join(city.data %>% dplyr::select(ORI9,ORI7,STATE,LEAR_ID,FIPS,first.year,last.year), by=c('lear'='LEAR_ID')) %>%
  left_join(ucr.offenses %>% dplyr::select(population,actual_index_violent,ori,year),
            by=c('ORI7'='ori', 'year'='year'))  %>%
  mutate(first.year.index = as.numeric(first.year)) %>%
  mutate(last.year.index = as.numeric(last.year)) %>%
  filter(!is.na(first.year.index) & !is.na(last.year.index) & !is.na(year)) %>%
  filter(year>=first.year.index) %>%
  filter(year<=last.year.index) %>%
  mutate(shotPerCap = n.shootings/(population/100000),
         shotPerCapFatal = n.shootings.fatal/(population/100000) )

department.yearly.data %>%
  ggplot() +
  geom_point(aes(x=shotPerCap,y=shotPerCapFatal), alpha = .3) +
  geom_abline(slope=1, intercept = 0) +
  theme_tufte() +
  xlim(0,10) + ylim(0,10)


#################################################
#################################################
##
##    figure 7.1
##
#################################################
#################################################

lemas.2020$LEAR_ID <- as.integer(as.character(lemas.2020$AGENCYID)) 
pdf('FinalFigures/fig7-1.pdf',5,5)
ois.min %>%
  group_by(LEAR_ID) %>%
  filter(Fatal != '') %>%
  mutate(fatal.num = ifelse(Fatal=='Fatal',1,ifelse(Fatal=='Nonfatal',0,NA))) %>%
  summarize(n.shot=n(),fatality=mean(fatal.num, na.rm=TRUE)) %>%
  filter(!is.na(fatality)) %>%
  left_join(lemas.2020, by = "LEAR_ID") %>%
  filter(n.shot < 200) %>%
  ggplot() + 
  geom_label(aes(x = 110, y = .18, label = "Pittsburgh"), 
             hjust = 0, 
             vjust = 0.5, 
             lineheight = 0.8,
             colour = "#555555", 
             fill = "white", 
             label.size = NA, 
             family="Helvetica", 
             size = 3) +
  geom_label(aes(x = 150, y = .15, label = "Miami"), 
             hjust = 0, 
             vjust = 0.5, 
             lineheight = 0.8,
             colour = "#555555", 
             fill = "white", 
             label.size = NA, 
             family="Helvetica", 
             size = 3) +
  geom_label(aes(x = 158, y = .4, label = "Washington, DC"), 
             hjust = 0, 
             vjust = 0.5, 
             lineheight = 0.8,
             colour = "#555555", 
             fill = "white", 
             label.size = NA, 
             family="Helvetica", 
             size = 3) +
  geom_label(aes(x = 167, y = .53, label = "Dallas"), 
             hjust = 0, 
             vjust = 0.5, 
             lineheight = 0.8,
             colour = "#555555", 
             fill = "white", 
             label.size = NA, 
             family="Helvetica", 
             size = 3) +
  geom_point(aes(x=n.shot, y=fatality, size=PRIMARYPOP2020), alpha = .3) +
  theme_tufte() +
  theme(legend.position = "none",
        axis.line = element_line(color = 'black')) +
  labs(x='Total Shootings',y='Proportion Fatal',
       title='Total Police Shootings and Fatality Rate for All Cities') 
dev.off()



################################################
################################################
#############
############# Figure 7.2
#############
################################################
################################################

pdf('FinalFigures/fig7-2.pdf',10,16)
ois.min %>%
  mutate(fatal.num = ifelse(Fatal=='Fatal',1,ifelse(Fatal=='Nonfatal',0,NA))) %>%
  mutate(race=ifelse(Suspect.Race %in% c('White','Black','Hispanic',
                                         'Asian'),
                     Suspect.Race, ifelse(Suspect.Race=='',
                                          'Unreported', 'Other'))) %>%
  mutate(race = factor(race,
                       levels = c('White','Black','Hispanic',
                                  'Asian',
                                  'Other',
                                  'Unreported'))) %>%
  filter(race %in% c('White','Black','Hispanic')) %>%
  group_by(LEAR_ID,race) %>%
  summarise(fatality = mean(fatal.num, na.rm=TRUE)) %>%
  pivot_wider(names_from = race, values_from = fatality) %>%
  filter(!is.na(White) & !is.na(Black) & !is.na(Hispanic)) %>%
  summarise(diff = White - Black) %>%
  arrange(diff) %>%
  left_join(ois.min %>% filter(Suspect.Race!='') %>% group_by(LEAR_ID) %>% summarise(n=n())) %>%
  left_join(city.data %>% dplyr::select(LEAR_ID,fullLoc)) %>%
  mutate(n.alpha = ecdf(n)(n)) %>%
  ggplot() +
  geom_bar(aes(x = as.factor(reorder(fullLoc,diff)), y = diff, alpha=n.alpha), 
           stat = "identity",
           show.legend = FALSE) +
  coord_flip()  +
  bbc_style() +
  labs(title='White-Black Fatality Differences',
       subtitle='Difference between White and Black subjects\'\naverage fatality rates in cities',
       y='White-Black Fatality Rate',
       x='') +
  theme(plot.caption = element_text(hjust = 0)) +
  theme(axis.title.x = element_text(size = 20), 
        axis.title.y = element_text(size = 20),
        # axis.text.y = element_blank(),
        axis.line=element_line(),
        panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank()) +
  theme(legend.position = 'none')
dev.off()





pdf('Figures/Fatality/fatalProportionHist.pdf',12,10)
ois.min %>%
  group_by(LEAR_ID) %>%
  #  filter(Fatal != '') %>%
  mutate(fatal.num = ifelse(Fatal=='Fatal',1,ifelse(Fatal=='Nonfatal',0,NA))) %>%
  summarize(n.shot=n(),fatality=mean(fatal.num, na.rm=TRUE)) %>%
  filter(!is.na(fatality)) %>%
  ggplot(aes(fatality, color = n.shot, fill = as.factor(n.shot))) +
  geom_histogram(binwidth = .025,position = position_stack(reverse = TRUE)) +
  #scale_colour_gradient(name = "fatality", 
  #                      low = "blue", high = "red") +
  scale_fill_viridis_d(direction = -1,option = "A") +
  bbc_style() +
  labs(title='Fatality Rates in Officer-Involved Shootings',
       subtitle = 'Fatality rate for each city in our data.\nDarker colors indicate cities with more shootings',
       caption = 'Note: Data exclude cities that did not provide fatality data at all.\nPartially missing fatality data within a city is ignored.') +
  theme(plot.caption = element_text(hjust = 0), legend.position = 'none') 
dev.off()

# pdf('Figures/Fatality/fatalProportionHist.pdf',5,5)
# ois.min %>%
#   mutate(fatal.numeric = ifelse(Fatal=="Fatal",1,ifelse(Fatal=="Nonfatal",0,NA))) %>%
#   group_by(City,State) %>%
#   summarise(fatal.prop = mean(fatal.numeric, na.rm=TRUE)) %>%
#   ggplot() + geom_histogram(aes(x=fatal.prop)) + 
#   bbc_style() +
#   labs(title='Fatality Rates in Police Shootings',
#        subtitle='Proportion of suspects shot in each city that is\nfatallty wounded',
#        x='Percent of the Shootings that are Fatal',
#        caption = 'Note: Proportions are for each city. Some cities have very few shootings.') +
#   theme(plot.caption = element_text(hjust = 0)) +
#   theme(axis.title.x = element_text(size = 20), 
#         axis.title.y = element_text(size = 20),
#         axis.line=element_line()) +
#   theme(legend.position = 'none')
# dev.off()

ois.min %>%
  mutate(fatal.num = ifelse(Fatal=='Fatal',1,ifelse(Fatal=='Nonfatal',0,NA))) %>%
  group_by(LEAR_ID,Year) %>%
  summarize(fatal.mean = mean(fatal.num, na.rm=TRUE)) %>%
  ggplot(aes(x=Year,y=fatal.mean))  + 
  geom_boxplot() +
  xlim(2000,2020)


########### model of fatality with chapter 4 covariates
fatality.ind <- ois.min %>%
  #  filter(Fatal != '') %>%
  mutate(fatal.num = ifelse(Fatal=='Fatal',1,ifelse(Fatal=='Nonfatal',0,NA))) %>%
  mutate(race=ifelse(Suspect.Race %in% c('White','Black','Hispanic',
                                         'Asian'),
                     Suspect.Race, ifelse(Suspect.Race=='',
                                          'Unreported', 'Other'))) %>%
  mutate(race = factor(race,
                       levels = c('White','Black','Hispanic',
                                  'Asian',
                                  'Other',
                                  'Unreported'))) %>%
  left_join(city.data %>% dplyr::select(ORI9,ORI7,STATE,LEAR_ID,FIPS,first.year,last.year), by=c('LEAR_ID'='LEAR_ID')) %>%
  left_join(ucr.offenses %>% dplyr::select(population,actual_index_violent,ori,year),
            by=c('ORI7'='ori', 'Year'='year'))  %>%
  # mutate(first.year.index = as.numeric(first.year)) %>%
  # mutate(last.year.index = as.numeric(last.year)) %>%
  # filter(!is.na(first.year.index) & !is.na(last.year.index) & !is.na(year)) %>%
  # filter(year>=first.year.index) %>%
  # filter(year<=last.year.index) %>%
  mutate(violPerCap = actual_index_violent/(population/100000),
         # shotPerCap = n.shootings/(population/100000),
         pop100k = population/100000) %>%
  left_join(dem.voteshare %>% dplyr::select(county_fips,year,demshare),
            by=c('FIPS'='county_fips', 'Year'='year')) %>%
  left_join(police.employees %>% dplyr::select(ORI7,year,total.employees,total.officers,total.civilians),
            by = c('ORI7' = 'ORI7', 'Year'='year')) %>%
  mutate(copsPerCap = total.officers/(population/100000)) %>%
  mutate(armed.total = ifelse(Weapon.Category %in% 
                                c("Vehicle","Gun","Sharp","Blunt","Multiple","Other","Explosives"),1,
                              ifelse(Weapon.Category %in% c('Unarmed','Unknown'),0,NA))) 


fatal_pois.no.fe = fepois(fatal.num ~ 
                            rescale(actual_index_violent)+rescale(demshare)+
                            rescale(total.officers)+
                            rescale(pop100k), fatality.ind)
summary(fatal_pois.no.fe)
fatal_pois = fepois(fatal.num ~ race +
                      rescale(actual_index_violent)+rescale(demshare)+
                      rescale(total.officers)+
                      rescale(pop100k)| LEAR_ID+Year, fatality.ind)
summary(fatal_pois)
fatal_pois.armed = fepois(fatal.num ~ race +
                            armed.total+
                            rescale(actual_index_violent)+rescale(demshare)+
                            rescale(total.officers)+
                            rescale(pop100k)| LEAR_ID+Year, fatality.ind)
summary(fatal_pois.armed)


########### fatality by armed status

fu <- ois.min %>% 
  mutate(coarse.weapon = ifelse(Weapon.Category %in% c('Sharp','Other','Multiple','Explosives','Blunt'),
                                'Other',Weapon.Category)) %>%
  filter(Fatal %in% c('Fatal','Nonfatal')) %>%
  filter(coarse.weapon != '')

table(fu$coarse.weapon,fu$Fatal)
prop.table(table(fu$coarse.weapon,fu$Fatal),margin=1)




############ fatality by hiring/training

## written aptitude test for hiring
ois.min %>%
  group_by(LEAR_ID) %>%
  filter(Fatal != '') %>%
  mutate(fatal.num = ifelse(Fatal=='Fatal',1,ifelse(Fatal=='Nonfatal',0,NA))) %>%
  summarize(n.shot=n(),fatality=mean(fatal.num, na.rm=TRUE)) %>%
  filter(!is.na(fatality)) %>%
  left_join(lemas.2016 %>% mutate(LEAR_ID=as.numeric(as.character(LEAR_ID)))) %>%
  group_by(PERS_APTEST) %>% 
  summarize(value = mean(fatality, na.rm = T)) 

## problem solving assessment
ois.min %>%
  group_by(LEAR_ID) %>%
  filter(Fatal != '') %>%
  mutate(fatal.num = ifelse(Fatal=='Fatal',1,ifelse(Fatal=='Nonfatal',0,NA))) %>%
  summarize(n.shot=n(),fatality=mean(fatal.num, na.rm=TRUE)) %>%
  filter(!is.na(fatality)) %>%
  left_join(lemas.2016 %>% mutate(LEAR_ID=as.numeric(as.character(LEAR_ID)))) %>%
  group_by(PERS_PROBSOLV) %>% 
  summarize(value = mean(fatality, na.rm = T)) 

ois.min %>%
  group_by(LEAR_ID) %>%
  filter(Fatal != '') %>%
  mutate(fatal.num = ifelse(Fatal=='Fatal',1,ifelse(Fatal=='Nonfatal',0,NA))) %>%
  summarize(n.shot=n(),fatality=mean(fatal.num, na.rm=TRUE)) %>%
  filter(!is.na(fatality)) %>%
  left_join(lemas.2016 %>% mutate(LEAR_ID=as.numeric(as.character(LEAR_ID)))) %>%
  group_by(PERS_CULTURE) %>% 
  summarize(value = mean(fatality, na.rm = T)) 


ois.min %>%
  group_by(LEAR_ID) %>%
  #  filter(Fatal != '') %>%
  mutate(fatal.num = ifelse(Fatal=='Fatal',1,ifelse(Fatal=='Nonfatal',0,NA))) %>%
  summarize(n.shot=n(),fatality=mean(fatal.num, na.rm=TRUE)) %>%
  filter(!is.na(fatality)) %>%
  left_join(lemas.2016 %>% mutate(LEAR_ID=as.numeric(as.character(LEAR_ID)))) %>%
  group_by(PERS_CONFLICT) %>% 
  summarize(value = mean(fatality, na.rm = T)) 

## social media check
ois.min %>%
  group_by(LEAR_ID) %>%
  #  filter(Fatal != '') %>%
  mutate(fatal.num = ifelse(Fatal=='Fatal',1,ifelse(Fatal=='Nonfatal',0,NA))) %>%
  summarize(n.shot=n(),fatality=mean(fatal.num, na.rm=TRUE)) %>%
  filter(!is.na(fatality)) %>%
  left_join(lemas.2016 %>% mutate(LEAR_ID=as.numeric(as.character(LEAR_ID)))) %>%
  group_by(PERS_SOCMED) %>% 
  summarize(value = mean(fatality, na.rm = T)) 


## psychological test
ois.min %>%
  group_by(LEAR_ID) %>%
  #  filter(Fatal != '') %>%
  mutate(fatal.num = ifelse(Fatal=='Fatal',1,ifelse(Fatal=='Nonfatal',0,NA))) %>%
  summarize(n.shot=n(),fatality=mean(fatal.num, na.rm=TRUE)) %>%
  filter(!is.na(fatality)) %>%
  left_join(lemas.2016 %>% mutate(LEAR_ID=as.numeric(as.character(LEAR_ID)))) %>%
  group_by(PERS_PERSTEST) %>% 
  summarize(value = mean(fatality, na.rm = T)) 

## psychological interview
ois.min %>%
  group_by(LEAR_ID) %>%
  #  filter(Fatal != '') %>%
  mutate(fatal.num = ifelse(Fatal=='Fatal',1,ifelse(Fatal=='Nonfatal',0,NA))) %>%
  summarize(n.shot=n(),fatality=mean(fatal.num, na.rm=TRUE)) %>%
  filter(!is.na(fatality)) %>%
  left_join(lemas.2016 %>% mutate(LEAR_ID=as.numeric(as.character(LEAR_ID)))) %>%
  group_by(PERS_PSYCH) %>% 
  summarize(value = mean(fatality, na.rm = T)) 




## difference in fatality rates by race and armed status
ois.min %>% 
  mutate(armed.total = ifelse(Weapon.Category %in% 
                                c("Vehicle","Gun","Sharp","Blunt","Multiple","Other","Explosives"),1,
                              ifelse(Weapon.Category %in% c('Unarmed','Unknown'),0,NA))) %>%
  mutate(race=ifelse(Suspect.Race %in% c('White','Black','Hispanic',
                                         'Asian'),
                     Suspect.Race, ifelse(Suspect.Race=='',
                                          'Unreported', 'Other'))) %>%
  mutate(race = factor(race,
                       levels = c('White','Black','Hispanic',
                                  'Asian',
                                  'Other',
                                  'Unreported'))) %>%
  mutate(fatal.num = ifelse(Fatal=='Fatal',1,ifelse(Fatal=='Nonfatal',0,NA))) %>%
  group_by(armed.total, race) %>% 
  summarize(value = mean(fatal.num, na.rm = T)) %>% 
  pivot_wider(names_from = armed.total)

prop.table(table(ois.min$Weapon.Category[ois.min$Suspect.Race=='Black'],ois.min$Fatal[ois.min$Suspect.Race=='Black']),margin=1)
prop.table(table(ois.min$Weapon.Category[ois.min$Suspect.Race=='White'],ois.min$Fatal[ois.min$Suspect.Race=='White']),margin=1)

ois.min$armed.total <- ifelse(ois.min$Weapon.Category %in% 
                                c("Vehicle","Gun","Sharp","Blunt","Multiple","Other","Explosives"),1,
                              ifelse(ois.min$Weapon.Category %in% c('Unarmed','Unknown'),0,NA))

prop.table(table(ois.min$armed.total[ois.min$Suspect.Race=='Black'],ois.min$Fatal[ois.min$Suspect.Race=='Black']),margin=1)
prop.table(table(ois.min$armed.total[ois.min$Suspect.Race=='White'],ois.min$Fatal[ois.min$Suspect.Race=='White']),margin=1)

fatality.ind$age.num <- as.numeric(fatality.ind$Suspect.Age)
fatal_pois.race = fepois(fatal.num ~ race | LEAR_ID+Year, fatality.ind)
summary(fatal_pois.race)
fatal_pois.armed = fepois(fatal.num ~ race +
                            armed.total+
                            rescale(actual_index_violent)+rescale(demshare)+
                            rescale(total.officers)+
                            rescale(pop100k)| LEAR_ID+Year, fatality.ind)
summary(fatal_pois.armed)
fatal_pois.armed.age = fepois(fatal.num ~ race +
                                armed.total+age.num+
                                rescale(actual_index_violent)+rescale(demshare)+
                                rescale(total.officers)+
                                rescale(pop100k)| LEAR_ID+Year, fatality.ind)
summary(fatal_pois.armed.age)

summary(fepois(fatal.num ~race+armed.total| LEAR_ID, fatality.ind))


fu <- read.csv('../../Data/Mapping Police Violence/mpvData.csv')

table(fu$Official.disposition.of.death..justified.or.other.)

fu$simplified.just <- ifelse(grepl('Justified by',fu$Official.disposition.of.death..justified.or.other.,ignore.case = TRUE),
                             'justified',fu$Official.disposition.of.death..justified.or.other.)


fu$simplified.just <- ifelse(grepl('Charged',fu$Official.disposition.of.death..justified.or.other.,ignore.case = TRUE),
                             'charged',fu$simplified.just)
fu$simplified.just <- ifelse(grepl('Indicted',fu$Official.disposition.of.death..justified.or.other.,ignore.case = TRUE),
                             'charged',fu$simplified.just)
fu$simplified.just <- ifelse(grepl('Cleared',fu$Official.disposition.of.death..justified.or.other.,ignore.case = TRUE),
                             'justified',fu$simplified.just)

fu$simplified.just <- ifelse(grepl('Justified',fu$Official.disposition.of.death..justified.or.other.,ignore.case = TRUE),
                             'justified',fu$simplified.just)

fu$simplified.just <- ifelse(grepl('Pending',fu$Official.disposition.of.death..justified.or.other.,ignore.case = TRUE),
                             'pending',fu$simplified.just)

fu$simplified.just <- ifelse(grepl('discipline',fu$Official.disposition.of.death..justified.or.other.,ignore.case = TRUE),
                             'discipline',fu$simplified.just)

fu$simplified.just <- ifelse(grepl('awarded',fu$Official.disposition.of.death..justified.or.other.,ignore.case = TRUE),
                             'award',fu$simplified.just)
fu$simplified.just <- ifelse(grepl('awared',fu$Official.disposition.of.death..justified.or.other.,ignore.case = TRUE),
                             'award',fu$simplified.just)
fu$simplified.just <- ifelse(grepl('Suicide',fu$Official.disposition.of.death..justified.or.other.,ignore.case = TRUE),
                             'other',fu$simplified.just)
fu$simplified.just <- ifelse(grepl('Unjustified',fu$Official.disposition.of.death..justified.or.other.,ignore.case = TRUE),
                             'unjustified',fu$simplified.just)
fu$simplified.just <- ifelse(grepl('accidental',fu$Official.disposition.of.death..justified.or.other.,ignore.case = TRUE),
                             'accidental',fu$simplified.just)
fu$simplified.just <- ifelse(fu$Official.disposition.of.death..justified.or.other.=='Ruled accidental',
                             'accidental',fu$simplified.just)
fu$simplified.just <- ifelse(grepl('investigation',fu$Official.disposition.of.death..justified.or.other.,ignore.case = TRUE),
                             'pending',fu$simplified.just)
fu$simplified.just <- ifelse(grepl('Excusable',fu$Official.disposition.of.death..justified.or.other.,ignore.case = TRUE),
                             'justified',fu$simplified.just)
fu$simplified.just <- ifelse(grepl('Unreported',fu$Official.disposition.of.death..justified.or.other.,ignore.case = TRUE),
                             'unknown',fu$simplified.just)
fu$simplified.just <- ifelse(fu$Official.disposition.of.death..justified.or.other.=='',
                             'unknown',fu$simplified.just)
fu$simplified.just <- ifelse(fu$Official.disposition.of.death..justified.or.other.=='Unknown',
                             'unknown',fu$simplified.just)
fu$simplified.just <- ifelse(fu$Official.disposition.of.death..justified.or.other.=='Civil Suit',
                             'award',fu$simplified.just)
fu$simplified.race <- ifelse(fu$Victim.s.race=='Black;Hispanic','Black',
                             ifelse(fu$Victim.s.race %in% 
                                      c('Native American','Native American;Hispanic',
                                        'Pacific Islander'),'Other',
                                    ifelse(fu$Victim.s.race %in%
                                             c('','Unknown race','Unknown Race'),'Unknown',
                                           fu$Victim.s.race)))
prop.table(table(fu$simplified.race[grepl('Gunshot',fu$Cause.of.death,ignore.case=TRUE) & fu$simplified.just!='pending' & fu$simplified.just!='unknown'],fu$simplified.just[grepl('Gunshot',fu$Cause.of.death,ignore.case=TRUE) & fu$simplified.just!='pending' & fu$simplified.just!='unknown']),margin=1)
table(fu$simplified.race[grepl('Gunshot',fu$Cause.of.death,ignore.case=TRUE)],fu$simplified.just[grepl('Gunshot',fu$Cause.of.death,ignore.case=TRUE)])

prop.table(table(fu$simplified.race[grepl('Gunshot',fu$Cause.of.death,ignore.case=TRUE) & fu$simplified.just!='pending' & fu$simplified.just!='unknown'],fu$simplified.just[grepl('Gunshot',fu$Cause.of.death,ignore.case=TRUE) & fu$simplified.just!='pending' & fu$simplified.just!='unknown']),margin=1)





ois.min %>%
  group_by(LEAR_ID) %>%
  #  filter(Fatal != '') %>%
  mutate(fatal.num = ifelse(Fatal=='Fatal',1,ifelse(Fatal=='Nonfatal',0,NA))) %>%
  summarize(n.shot=n(),fatality=mean(fatal.num, na.rm=TRUE)) %>%
  filter(!is.na(fatality)) %>%
  left_join(lemas.2016 %>% mutate(LEAR_ID=as.numeric(as.character(LEAR_ID)))) %>%
  group_by(CP_TRN_INSRV) %>% 
  summarize(value = mean(fatality, na.rm = T))

ois.min %>%
  group_by(LEAR_ID) %>%
  #  filter(Fatal != '') %>%
  mutate(fatal.num = ifelse(Fatal=='Fatal',1,ifelse(Fatal=='Nonfatal',0,NA))) %>%
  summarize(n.shot=n(),fatality=mean(fatal.num, na.rm=TRUE)) %>%
  filter(!is.na(fatality)) %>%
  left_join(lemas.2016 %>% mutate(LEAR_ID=as.numeric(as.character(LEAR_ID)))) %>%
  group_by(POL_INV_INJRY) %>% 
  summarize(value = mean(fatality, na.rm = T)) 

fu<-ois.min %>%
  group_by(LEAR_ID) %>%
  #  filter(Fatal != '') %>%
  mutate(fatal.num = ifelse(Fatal=='Fatal',1,ifelse(Fatal=='Nonfatal',0,NA))) %>%
  summarize(n.shot=n(),fatality=mean(fatal.num, na.rm=TRUE)) %>%
  filter(!is.na(fatality)) %>%
  left_join(lemas.2016 %>% mutate(LEAR_ID=as.numeric(as.character(LEAR_ID)))) %>%
  group_by(POL_INV_DCHG_GUN) %>% 
  summarize(value = mean(fatality, na.rm = T)) 

table(fu$POL_INV_DCHG_GUN)
t.test(fu$fatality~fu$POL_CCRB)
ois.min %>%
  group_by(LEAR_ID) %>%
  #  filter(Fatal != '') %>%
  mutate(fatal.num = ifelse(Fatal=='Fatal',1,ifelse(Fatal=='Nonfatal',0,NA))) %>%
  summarize(n.shot=n(),fatality=mean(fatal.num, na.rm=TRUE)) %>%
  filter(!is.na(fatality)) %>%
  left_join(lemas.2016 %>% mutate(LEAR_ID=as.numeric(as.character(LEAR_ID)))) %>%
  group_by(POL_CCRB) %>% 
  summarize(value = mean(fatality, na.rm = T)) 




fatal_pois = fepois(fatal.num ~ race | LEAR_ID+Year, fatality.ind)



city.fatality <- ois.min %>%
  group_by(LEAR_ID) %>%
  #  filter(Fatal != '') %>%
  mutate(fatal.num = ifelse(Fatal=='Fatal',1,ifelse(Fatal=='Nonfatal',0,NA))) %>%
  summarize(n.shot=n(),fatality=mean(fatal.num, na.rm=TRUE)) %>%
  filter(!is.na(fatality)) %>%
  left_join(city.data)







ois.min %>%
  mutate(fatal.num = ifelse(Fatal=='Fatal',1,ifelse(Fatal=='Nonfatal',0,NA))) %>%
  mutate(race=ifelse(Suspect.Race %in% c('White','Black','Hispanic',
                                         'Asian'),
                     Suspect.Race, ifelse(Suspect.Race=='',
                                          'Unreported', 'Other'))) %>%
  mutate(race = factor(race,
                       levels = c('White','Black','Hispanic',
                                  'Asian',
                                  'Other',
                                  'Unreported'))) %>%
  filter(race %in% c('White','Black','Hispanic')) %>%
  group_by(LEAR_ID,race) %>%
  summarise(fatality = mean(fatal.num, na.rm=TRUE)) %>%
  ggplot() +
  geom_density(aes(x=fatality, group=race, fill=race), alpha=0.5)
geom_histogram(aes(x=fatality)) +facet_grid(.~race)

fu <- ois.min %>%
  mutate(fatal.num = ifelse(Fatal=='Fatal',1,ifelse(Fatal=='Nonfatal',0,NA))) %>%
  mutate(race=ifelse(Suspect.Race %in% c('White','Black','Hispanic',
                                         'Asian'),
                     Suspect.Race, ifelse(Suspect.Race=='',
                                          'Unreported', 'Other'))) %>%
  mutate(race = factor(race,
                       levels = c('White','Black','Hispanic',
                                  'Asian',
                                  'Other',
                                  'Unreported'))) %>%
  filter(race %in% c('White','Black','Hispanic')) %>%
  group_by(LEAR_ID,race) %>%
  summarise(fatality = mean(fatal.num, na.rm=TRUE)) 

t.test(fu$fatality[fu$race=='Black'],fu$fatality[fu$race=='White'])
t.test(fu$fatality[fu$race=='Hispanic'],fu$fatality[fu$race=='White'])


#################################################
#################################################
##
##    Comparing Data with and without Nonfatal shootings
##
#################################################
#################################################


pdf('Figures/Fatality/shootingsVFatalShootingsWWaPoCutoff.pdf',7,7)
ois.min %>%
  group_by(LEAR_ID) %>%
  summarise(n=n(),
            fatality=sum(ifelse(Fatal=='Fatal', 1, ifelse(Fatal=='Nonfatal', 0, NA)), na.rm=TRUE)) %>%
  ggplot() + 
  geom_point(aes(y=jitter(n), x=jitter(fatality), color=(fatality>5)), size=1.5) + 
  scale_color_manual(values = c("black","grey")) +
  xlim(0,50) + ylim(0,100) +
  geom_abline(intercept = 0) +
  bbc_style() +
  labs(title='Fatal and Non-Fatal Shootings',
       subtitle='Comparison of number of fatal and\nnon-fatal shootings in cities',
       x='Total Fatal Shootings',
       y='Total Shootings',
       caption = 'Note: Numbers show each city in our data.') +
  theme(plot.caption = element_text(hjust = 0)) +
  theme(axis.title.x = element_text(size = 20), 
        axis.title.y = element_text(size = 20),
        axis.line=element_line()) +
  theme(legend.position = 'none')
# geom_vline(xintercept = 5, col='red') +
dev.off()


ois.min %>%
  mutate(fatal.numeric = ifelse(Fatal=="Fatal",1,ifelse(Fatal=="Nonfatal",0,NA))) %>%
  group_by(LEAR_ID) %>%
  summarise(fatal.prop = mean(fatal.numeric, na.rm=TRUE)) %>%
  left_join(lemas.2020, by=c('LEAR_ID'='AGENCYID')) %>%
  ggplot() +
  geom_point(aes(x=RES_POP,y=fatal.prop))
geom_smooth(aes(x=RES_POP,y=fatal.prop))


left_join(lemas.2016 %>% dplyr::select(ORI9,OPBUDGET,POPSERVED,FTSWORN,PERS_TRN_INSVC)) %>%
  filter(!is.na(OPBUDGET)) %>%
  mutate(early = ifelse(year>2010 & year < 2016, 'Early',
                        ifelse(year>2015 & year < 2021, 'Late',NA))) %>%
  filter(!is.na(early)) %>%
  group_by(lear,early) %>%
  summarize(n=mean(n.shootings),
            opbudgTotal=mean(OPBUDGET/1000000),
            opbudgPerRes=mean(OPBUDGET/POPSERVED),
            opbudgPerCop=mean(OPBUDGET/(FTSWORN))/100,
            FTSWORN=mean(FTSWORN)) %>%
  
  
  
  
  ois.min$Weapon.Category <- weapon.mapping$Weapon.Category[match(ois.min$Suspect.Weapon, weapon.mapping$Suspect.Weapon)]
ois.min$Detailed.Weapon.Category <- weapon.mapping$Detailed.Weapon.Category[match(ois.min$Suspect.Weapon, weapon.mapping$Suspect.Weapon)]

ois.min <- ois.min %>% filter(Hit != 'Miss')

ois.min %>%
  mutate(suspect.age.num = as.numeric(Suspect.Age)) %>%
  group_by(Suspect.Race) %>%
  summarise(mean.age=mean(suspect.age.num, na.rm=TRUE), 
            sd.age=sd(suspect.age.num, na.rm=TRUE))

ois.min %>%
  filter(Suspect.Race %in% c('White','Black')) %>%
  mutate(suspect.age.num = as.numeric(Suspect.Age)) %>%
  filter(!is.na(suspect.age.num)) %>%
  summarise(ttest = list(t.test(suspect.age.num ~ Suspect.Race, var.equal = TRUE))) 

fatality.data <- ois.min %>%
  group_by(State) %>%
  summarise(Responded=mean(ifelse(Fatal=='Fatal', 1, 
                                  ifelse(Fatal=='Nonfatal',0,NA)), na.rm=TRUE))

have.data <- as.data.frame(fatality.data) 
MainStates <- map_data("state")
abb.walk <- as.data.frame(cbind(state.abb,tolower(state.name)))
have.data.data <- left_join(abb.walk,have.data, by = c('state.abb' = 'State'))
names(have.data.data) <- c('state.abb','state','Responded') 
pdf('Figures/Fatality/fatalityRateMap.pdf',5,5)
plot_usmap(data = have.data.data, values = 'Responded') +
  scale_fill_continuous(name = "Proportion",label = scales::comma,
                        limits = c(0,1)) +
  labs(title='Fatality By State') +
  theme(legend.position = "bottom")
dev.off()


pdf('Figures/Preliminary Figures/fatalityRateDensity.pdf',5,5)
ois.city %>%
  group_by(LEAR_ID) %>%
  summarise(fatality=mean(ifelse(Fatal=='Fatal', 1, 
                                 ifelse(Fatal=='Nonfatal',0,NA)), na.rm=TRUE),
            n=n()) %>%
  ggplot() +
  geom_density(aes(x=fatality,weights=n)) +
  theme_igray() +
  xlab('Fatality Rate') +
  ylab('Density')
dev.off()

# PERS_FEMALE
# PERS_MALE
# 
# PERS_BLACK_FEM
# PERS_BLACK_MALE
# 
# PERS_WHITE_FEM
# PERS_WHITE_MALE


pdf('Figures/Preliminary Figures/fatalityVWhiteBlackRatio.pdf',5,5)
ois.with.lemas %>%
  group_by(LEAR_ID) %>%
  summarize(n=n(),
            fatality=mean(ifelse(Fatal=='Fatal', 1, 
                                 ifelse(Fatal=='Nonfatal',0,NA)), na.rm=TRUE),
            white.black.ratio = mean((PERS_WHITE_FEM+PERS_WHITE_MALE)/(PERS_BLACK_FEM+PERS_BLACK_MALE), na.rm=TRUE)) %>%
  ggplot() +
  geom_smooth(aes(x=white.black.ratio, y=fatality)) +
  geom_point(aes(x=white.black.ratio, y=fatality)) +
  theme_igray() +
  xlab('Ratio of White to Black Officers') +
  ylab('Fatality Rate') + ylim(0,1)
dev.off()  

pdf('Figures/Preliminary Figures/fatalityMaleFemaleRatio.pdf',5,5)
ois.with.lemas %>%
  group_by(LEAR_ID) %>%
  summarize(n=n(),
            fatality=mean(ifelse(Fatal=='Fatal', 1, 
                                 ifelse(Fatal=='Nonfatal',0,NA)), na.rm=TRUE),
            male.female.ratio = mean((PERS_BLACK_MALE+PERS_WHITE_MALE)/(PERS_BLACK_FEM+PERS_WHITE_FEM), na.rm=TRUE)) %>%
  ggplot() +
  geom_smooth(aes(x=male.female.ratio, y=fatality)) +
  geom_point(aes(x=male.female.ratio, y=fatality)) +
  theme_igray() +
  xlab('Ratio of Male to Female Officers') +
  ylab('Fatality Rate') + ylim(0,1)
dev.off()  






coords <- ois.city %>%
  left_join(.,city.metadata, by=c("CITY"="city_ascii", "State.x"='state_id')) %>%
  group_by(LEAR_ID) %>%
  mutate(fatality=mean(ifelse(Fatal=='Fatal', 1, 
                              ifelse(Fatal=='Nonfatal',0,NA)), na.rm=TRUE)) %>%
  mutate(totalYears = ifelse(!is.na(first.year), as.numeric(last.year)-as.numeric(first.year)+1,0)) %>%
  select(lat,lng,fatality,totalYears) %>%
  filter(!is.na(lat)) %>%
  filter(!is.na(fatality)) %>%
  usmap_transform(input_names = c('lng','lat'))

pdf('Figures/Preliminary Figures/fatalityRateCityMap.pdf',5,5)
plot_usmap() +
  scale_color_gradient2(low="blue", high="red", 
                        limits = c(0,1), midpoint = 0.5) +
  geom_point(data=coords,aes(x = x, y = y, 
                             colour = fatality, size=totalYears), alpha=.5) +
  theme(legend.position='bottom') +
  labs(size="Years of Data",colour="Fatality Rate") +
  scale_size_binned(breaks=c(1,5,0,15,20))
dev.off()

pdf('Figures/Preliminary Figures/shootingsVFatalShootings.pdf',5,5)
ois.min %>%
  group_by(City,Year) %>%
  summarise(n=n(),
            fatality=sum(ifelse(Fatal=='Fatal', 1, ifelse(Fatal=='Nonfatal', 0, NA)), na.rm=TRUE)) %>%
  ggplot() + 
  geom_point(aes(y=n, x=fatality)) + 
  xlim(0,100) + ylim(0,100) +
  geom_abline(intercept = 0) +
  xlab('Total Fatal Shootings') +
  ylab('Total Shootings') +
  theme_igray()
dev.off()



